library(tidyverse)
library(haven)
library(mice)
library(ggplot2)
library(lubridate)

load("D:\\UKBdata\\dputdata\\df15.Rdata")
load("D:\\UKBdata\\dputdata\\NAFLD_SNP_risk.Rdata")

final_nafld1 <- df15
# 使用merge函数根据eid列合并两个数据框，只保留final_nafld1中的行
final_nafld11 <- merge(final_nafld1, NAFLD_SNP_risk, by = "eid", all.x = TRUE)


#计算肌肉量差值四分位
final_nafld11 <- final_nafld11 %>%
  mutate(
    muscle_change_diff = ASM_BMI_1 - ASM_BMI_0,  # 计算差值
    muscle_change4 = ntile(muscle_change_diff, 4),  # 根据差值进行四分位分组
    muscle_change3 = ntile(muscle_change_diff, 3)
  )


#握力：计算差值四分位
final_nafld11 <- final_nafld11 %>%
  mutate(
    grip_strength_diff = grip_strength1 - grip_strength0,  # 计算差值
    grip_strength4 = ntile(grip_strength_diff, 4),  # 根据差值进行四分位分组
    grip_strength3 = ntile(grip_strength_diff, 3)
  )

#BMI：计算差值四分位
final_nafld11 <- final_nafld11 %>%
  mutate(
    BMI_diff = BMI1 - BMI0,  # 计算BMI差值
    BMI_change_tertile = ntile(BMI_diff, 3),  # 三分位数分组
    BMI_change_quartile = ntile(BMI_diff, 4),  # 四分位数分组
    BMI_change_tertile_label = fct_recode(factor(BMI_change_tertile), 
                                          "T1" = "1",  # 第一组（低）
                                          "T2" = "2",  # 第二组（中）
                                          "T3" = "3"),  # 第三组（高）
    BMI_change_quartile_label = fct_recode(factor(BMI_change_quartile), 
                                           "Q1" = "1",  # 第一组（低）
                                           "Q2" = "2",  # 第二组（较低中）
                                           "Q3" = "3",  # 第三组（较高中）
                                           "Q4" = "4")  # 第四组（高）
  )
table(final_nafld11$BMI_change_tertile_label)


#计算BMI四分位
final_nafld11 <- final_nafld11 %>%
  mutate(BMI_4 = ntile(BMI0, 4),  # 根据差值进行四分位分组
         BMI_3 = ntile(BMI0, 3)
  )
table(final_nafld11$BMI_4)

final_nafld11 <- final_nafld11 %>%
  mutate(age_group=ifelse(age0>=60,1,0)) %>% 
  mutate(age_group = factor(age_group, 
                            levels = c(0,1), 
                            labels = c("<60", ">=60")))

table(final_nafld11$MET_group)

final_nafld11 <- final_nafld11 %>%
  mutate(BMI_category = case_when(
    BMI0 < 18.5 ~ 1,      # BMI < 18.5 为 1: Underweight
    BMI0 >= 18.5 & BMI0 < 25 ~ 1,  # 18.5 到 24.9 为 2: Normal weight
    BMI0 >= 25 & BMI0 < 30 ~ 2,  # 25 到 29.9 为 3: Overweight
    BMI0 >= 30 ~ 2        # BMI ≥ 30 为 4: Obesity
  )) %>%
  # 将 BMI_category 转换为因子变量，并设置标签
  mutate(BMI_category = factor(BMI_category, 
                               levels = c(1, 2), 
                               labels = c("Underweight and Normal weight", "Overweight and Obesity")))

final_nafld11 <- final_nafld11 %>%
  mutate(sex = factor(sex, 
                      levels = c(0,1), 
                      labels = c("Female", "Male")))

final_nafld11 <- final_nafld11 %>%
  mutate(NAFLD_risk_3q = factor(NAFLD_risk_3q, 
                                levels = c(1,2,3), 
                                labels = c("Low risk", "Moderate risk", "High risk")))
table(final_nafld11$NAFLD_risk_3q)

df1 <- final_nafld11 %>% 
  mutate(event_2=ifelse(Event_Status==1,1,0))



table(df1$muscle_change4)
table(df1$grip_strength4)
summary((df1$Follow_Up_Time_Years))
table(df1$Event_Status)
table(df1$BMI_change_tertile_label)

library(dplyr)
library(jstable)
library(survival)

summary(df1$FollowUp_Years_NAFLD1)
df1$FollowUp_Years_NAFLD1 <- as.numeric(df1$FollowUp_Years_NAFLD1)
df1$Event_Status <- as.numeric(df1$Event_Status)
df1$event_2 <- as.numeric(df1$event_2 )
df1$muscle_change4 <- as.factor(df1$muscle_change4)
df1$muscle_change3 <- as.factor(df1$muscle_change3)
df1$NAFLD_risk_3q <- as.factor(df1$NAFLD_risk_3q)
df1$NAFLD_risk_4q <- as.factor(df1$NAFLD_risk_4q)
table(df1$event_2)

df1$education_new <- as.factor(df1$education_new)
table(df1$education_new)
df1$smokingstatus0 <- as.factor(df1$smokingstatus0)
df1$alcoholstatus0 <- as.factor(df1$alcoholstatus0)
df1$diet0 <- as.factor(df1$diet0)
df1$central_obesity0 <- as.factor(df1$central_obesity0)
df1$T2DM0_all <- as.factor(df1$T2DM0_all)
df1$low_HDL0 <- as.factor(df1$high_TG0)
df1$race <- as.factor(df1$race)
df1$BMI_4 <- as.factor(df1$BMI_4)
df1$BMI_3 <- as.factor(df1$BMI_3)
df1$age_group <- as.factor(df1$age_group)
df1$sex <- as.factor(df1$sex)
df1$alcohol_new0 <- as.factor(df1$alcohol_new0)
table(df1$alcohol_new0)

#重命名
df1 <- df1 %>% 
  rename(BMI_change=BMI_change_tertile_label,
         Age_group=age_group,
         Sex=sex,
         BMI_category=BMI_category)


df1 <- df1 %>%
  mutate(
    Townsend = ifelse(is.na(Townsend), median(Townsend, na.rm = TRUE), Townsend),
    race = replace(race, is.na(race), 1),
    education_new = ifelse(is.na(education_new), "Other levels", education_new),
    smokingstatus0 = replace(smokingstatus0, is.na(smokingstatus0), 0)
  )

table(df1$event_2,useNA = "ifany")
table(df1$muscle_change3,useNA = "ifany")
summary(df1$Follow_Up_Time_Years)
table(df1$Age_group,useNA = "ifany")
table(df1$Sex,useNA = "ifany")
table(df1$race,useNA = "ifany")
table(df1$BMI_category,useNA = "ifany")
table(df1$BMI_change,useNA = "ifany")
table(df1$education_new,useNA = "ifany")
table(df1$smokingstatus0,useNA = "ifany")
table(df1$alcohol_new0,useNA = "ifany")
summary(df1$Townsend)
summary(df1$MET插补0)
table(df1$central_obesity0,useNA = "ifany")
table(df1$T2DM0_all,useNA = "ifany")
table(df1$low_HDL0,useNA = "ifany")
table(df1$high_TG0,useNA = "ifany")




summary(df1$ASM_BMI_0)

res<-TableSubgroupMultiCox(
  
  #指定公式
  formula=Surv(FollowUp_Years_NAFLD1,event_2)~muscle_change3,
  
  #指定哪些变量有亚组
  var_subgroups=c("Age_group","Sex","BMI_category"),
  var_cov=c("Townsend","education_new",
            "smokingstatus0","alcohol_new0","MET插补0","central_obesity0",
            "T2DM0_all","low_HDL0","high_TG0","ASM_BMI_0"),#就是在这里添加你的其他协变量！！！
  data=df1#指定你的数据
)

library(openxlsx)

res1 <- res

# 将Upper和Lower列从字符型转换为数值型
res1$Upper <- as.numeric(res1$Upper)
res1$Lower <- as.numeric(res1$Lower)

# 将Point Estimate列中的'Reference'替换为1
res1$`Point Estimate`[res1$`Point Estimate` == "Reference"] <- "1"

# 仅对Point Estimate列为1的行，将Upper和Lower列的NA值设定为1
res1$Upper[res1$`Point Estimate` == "1" & is.na(res1$Upper)] <- 1
res1$Lower[res1$`Point Estimate` == "1" & is.na(res1$Lower)] <- 1
head(res1)

# 替换Variable列中包含'_'的字符为空格
res1$Variable <- gsub("_", " ", res1$Variable)
# 定义检测函数：判断字符串是否为有效数字（允许正负号、小数点和数字）
is_numeric_str <- function(x) {
  grepl("^[-+]?\\d+(\\.\\d+)?$", x)
}

library(grid)
# 生成新列 OR(95%CI)
res2 <- res1 %>%
  mutate(
    `HR(95%CI)` = case_when(
      # 条件1：Point Estimate 为 "Reference" 时填充 "Ref"
      `Point Estimate` == "1" ~ "Ref.",
      
      # 条件2：Lower 为有效数字字符且非缺失时拼接 OR(95%CI)
      is_numeric_str(Lower) & !is.na(Lower) ~ paste0(
        `Point Estimate`, " (", Lower, "-", Upper, ")"
      ),
      
      # 其他情况设为 NA
      TRUE ~ NA_character_
    )
  )

print(res2$`Point Estimate`)

# 将Upper和Lower列从字符型转换为数值型
res2$Upper <- as.numeric(res2$Upper)
res2$Lower <- as.numeric(res2$Lower)

# 将Point Estimate列中的'Reference'替换为1
res2$`Point Estimate`[res2$`Point Estimate` == "Reference"] <- "1"

# 仅对Point Estimate列为1的行，将Upper和Lower列的NA值设定为1
res2$Upper[res2$`Point Estimate` == "1" & is.na(res1$Upper)] <- 1
res2$Lower[res2$`Point Estimate` == "1" & is.na(res1$Lower)] <- 1
head(res2)

# 替换Variable列中包含'_'的字符为空格
res2$Variable <- gsub("_", " ", res2$Variable)


plot_df<-res2
plot_df[,c(2,3,9,10,11)][is.na(plot_df[,c(2,3,9,10,11)])]<-" "
plot_df$` `<-paste(rep(" ",nrow(plot_df)),collapse=" ")
plot_df[,5:8]<-apply(plot_df[,5:8],2,as.numeric)

library(forestploter)
library(grid)

# 保存为PDF文件
pdf(file = "D:\\UKBdata\\dputdata\\forest_plot3_ASM_BMI0.pdf", width = 15, height = 25)

p<-forest(
  data=plot_df[,c(1,2,3,11,12,9,10)],
  lower=plot_df$Lower,
  upper=plot_df$Upper,
  est=plot_df$`Point Estimate`,
  ci_column=5,
  #sizes=(plot_df$estimate+0.001)*0.3,
  ref_line=1,
  xlim=c(0,2)
)
print(p)

# 关闭PDF设备
dev.off()




table(df1$NAFLD_risk_3q)

summary(df1$NAFLD_diag_date)
summary(df1$instance1_date)

 

library(lubridate)  # 用于日期计算

df1 <- df1 %>%
  mutate(
    # 计算instance1_date + 2年
    cutoff_date = instance1_date %m+% years(2),
    
    # 创建新列（核心逻辑）
    two_year_nafld = case_when(
      !is.na(NAFLD_diag_date) & NAFLD_diag_date < cutoff_date ~ 1,
      TRUE ~ 0  # 所有其他情况
    )
  ) %>%
  select(-cutoff_date)  # 移除临时列

table(df2$two_year_nafld,useNA = "ifany")

df2 <- df1 %>% 
  filter(two_year_nafld==0)


res<-TableSubgroupMultiCox(
  
  #指定公式
  formula=Surv(FollowUp_Years_NAFLD1,event_2)~muscle_change3,
  
  #指定哪些变量有亚组
  var_subgroups=c("Age_group","Sex","BMI_category"),
  var_cov=c("Townsend","education_new",
            "smokingstatus0","alcohol_new0","MET插补0","central_obesity0",
            "T2DM0_all","low_HDL0","high_TG0","ASM_BMI_0"),#就是在这里添加你的其他协变量！！！
  data=df2#指定你的数据
)

library(openxlsx)

res1 <- res

# 将Upper和Lower列从字符型转换为数值型
res1$Upper <- as.numeric(res1$Upper)
res1$Lower <- as.numeric(res1$Lower)

# 将Point Estimate列中的'Reference'替换为1
res1$`Point Estimate`[res1$`Point Estimate` == "Reference"] <- "1"

# 仅对Point Estimate列为1的行，将Upper和Lower列的NA值设定为1
res1$Upper[res1$`Point Estimate` == "1" & is.na(res1$Upper)] <- 1
res1$Lower[res1$`Point Estimate` == "1" & is.na(res1$Lower)] <- 1
head(res1)

# 替换Variable列中包含'_'的字符为空格
res1$Variable <- gsub("_", " ", res1$Variable)
# 定义检测函数：判断字符串是否为有效数字（允许正负号、小数点和数字）
is_numeric_str <- function(x) {
  grepl("^[-+]?\\d+(\\.\\d+)?$", x)
}

library(grid)
# 生成新列 OR(95%CI)
res2 <- res1 %>%
  mutate(
    `HR(95%CI)` = case_when(
      # 条件1：Point Estimate 为 "Reference" 时填充 "Ref"
      `Point Estimate` == "1" ~ "Ref.",
      
      # 条件2：Lower 为有效数字字符且非缺失时拼接 OR(95%CI)
      is_numeric_str(Lower) & !is.na(Lower) ~ paste0(
        `Point Estimate`, " (", Lower, "-", Upper, ")"
      ),
      
      # 其他情况设为 NA
      TRUE ~ NA_character_
    )
  )

print(res2$`Point Estimate`)

# 将Upper和Lower列从字符型转换为数值型
res2$Upper <- as.numeric(res2$Upper)
res2$Lower <- as.numeric(res2$Lower)

# 将Point Estimate列中的'Reference'替换为1
res2$`Point Estimate`[res2$`Point Estimate` == "Reference"] <- "1"

# 仅对Point Estimate列为1的行，将Upper和Lower列的NA值设定为1
res2$Upper[res2$`Point Estimate` == "1" & is.na(res1$Upper)] <- 1
res2$Lower[res2$`Point Estimate` == "1" & is.na(res1$Lower)] <- 1
head(res2)

# 替换Variable列中包含'_'的字符为空格
res2$Variable <- gsub("_", " ", res2$Variable)


plot_df<-res2
plot_df[,c(2,3,9,10,11)][is.na(plot_df[,c(2,3,9,10,11)])]<-" "
plot_df$` `<-paste(rep(" ",nrow(plot_df)),collapse=" ")
plot_df[,5:8]<-apply(plot_df[,5:8],2,as.numeric)

library(forestploter)
library(grid)

# 保存为PDF文件
pdf(file = "D:\\UKBdata\\dputdata\\forest_plot3_two_year_nafld.pdf", width = 15, height = 25)

p<-forest(
  data=plot_df[,c(1,2,3,11,12,9,10)],
  lower=plot_df$Lower,
  upper=plot_df$Upper,
  est=plot_df$`Point Estimate`,
  ci_column=5,
  #sizes=(plot_df$estimate+0.001)*0.3,
  ref_line=1,
  xlim=c(0,2)
)
print(p)

# 关闭PDF设备
dev.off()



table(df1$Other_Liver_Disease_Before_Baseline)



df2 <- df1 %>% 
  filter(Other_Liver_Disease_Before_Baseline==0)


res<-TableSubgroupMultiCox(
  
  #指定公式
  formula=Surv(FollowUp_Years_NAFLD1,event_2)~muscle_change3,
  
  #指定哪些变量有亚组
  var_subgroups=c("Age_group","Sex","BMI_category"),
  var_cov=c("Townsend","education_new",
            "smokingstatus0","alcohol_new0","MET插补0","central_obesity0",
            "T2DM0_all","low_HDL0","high_TG0","ASM_BMI_0"),#就是在这里添加你的其他协变量！！！
  data=df2#指定你的数据
)

library(openxlsx)

res1 <- res

# 将Upper和Lower列从字符型转换为数值型
res1$Upper <- as.numeric(res1$Upper)
res1$Lower <- as.numeric(res1$Lower)

# 将Point Estimate列中的'Reference'替换为1
res1$`Point Estimate`[res1$`Point Estimate` == "Reference"] <- "1"

# 仅对Point Estimate列为1的行，将Upper和Lower列的NA值设定为1
res1$Upper[res1$`Point Estimate` == "1" & is.na(res1$Upper)] <- 1
res1$Lower[res1$`Point Estimate` == "1" & is.na(res1$Lower)] <- 1
head(res1)

# 替换Variable列中包含'_'的字符为空格
res1$Variable <- gsub("_", " ", res1$Variable)
# 定义检测函数：判断字符串是否为有效数字（允许正负号、小数点和数字）
is_numeric_str <- function(x) {
  grepl("^[-+]?\\d+(\\.\\d+)?$", x)
}

library(grid)
# 生成新列 OR(95%CI)
res2 <- res1 %>%
  mutate(
    `HR(95%CI)` = case_when(
      # 条件1：Point Estimate 为 "Reference" 时填充 "Ref"
      `Point Estimate` == "1" ~ "Ref.",
      
      # 条件2：Lower 为有效数字字符且非缺失时拼接 OR(95%CI)
      is_numeric_str(Lower) & !is.na(Lower) ~ paste0(
        `Point Estimate`, " (", Lower, "-", Upper, ")"
      ),
      
      # 其他情况设为 NA
      TRUE ~ NA_character_
    )
  )

print(res2$`Point Estimate`)

# 将Upper和Lower列从字符型转换为数值型
res2$Upper <- as.numeric(res2$Upper)
res2$Lower <- as.numeric(res2$Lower)

# 将Point Estimate列中的'Reference'替换为1
res2$`Point Estimate`[res2$`Point Estimate` == "Reference"] <- "1"

# 仅对Point Estimate列为1的行，将Upper和Lower列的NA值设定为1
res2$Upper[res2$`Point Estimate` == "1" & is.na(res1$Upper)] <- 1
res2$Lower[res2$`Point Estimate` == "1" & is.na(res1$Lower)] <- 1
head(res2)

# 替换Variable列中包含'_'的字符为空格
res2$Variable <- gsub("_", " ", res2$Variable)


plot_df<-res2
plot_df[,c(2,3,9,10,11)][is.na(plot_df[,c(2,3,9,10,11)])]<-" "
plot_df$` `<-paste(rep(" ",nrow(plot_df)),collapse=" ")
plot_df[,5:8]<-apply(plot_df[,5:8],2,as.numeric)

library(forestploter)
library(grid)

# 保存为PDF文件
pdf(file = "D:\\UKBdata\\dputdata\\forest_plot3_other_liver.pdf", width = 15, height = 25)

p<-forest(
  data=plot_df[,c(1,2,3,11,12,9,10)],
  lower=plot_df$Lower,
  upper=plot_df$Upper,
  est=plot_df$`Point Estimate`,
  ci_column=5,
  #sizes=(plot_df$estimate+0.001)*0.3,
  ref_line=1,
  xlim=c(0,2)
)
print(p)

# 关闭PDF设备
dev.off()


library(haven)

drink1 <- read.csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_drink_new_g.csv")
str(drink1)


table(drink1$Average.weekly.red.wine.intake...Instance.0,useNA = "ifany")
table(drink1$Average.weekly.champagne.plus.white.wine.intake...Instance.0,useNA = "ifany")
table(drink1$Average.weekly.beer.plus.cider.intake...Instance.0,useNA = "ifany")
table(drink1$Average.weekly.spirits.intake...Instance.0,useNA = "ifany")
table(drink1$Average.weekly.fortified.wine.intake...Instance.0,useNA = "ifany")
table(drink1$Average.weekly.intake.of.other.alcoholic.drinks...Instance.0,useNA = "ifany")

drink2 <- drink1 %>%
  rename(
    red_wine_week_0 = "Average.weekly.red.wine.intake...Instance.0",
    champ_white_week_0 = "Average.weekly.champagne.plus.white.wine.intake...Instance.0",
    beer_cider_week_0 = "Average.weekly.beer.plus.cider.intake...Instance.0",
    spirits_week_0 = "Average.weekly.spirits.intake...Instance.0",
    fortified_week_0 = "Average.weekly.fortified.wine.intake...Instance.0",
    other_week_0 = "Average.weekly.intake.of.other.alcoholic.drinks...Instance.0"
  )


str(drink2)

drink3 <- drink2 %>% 
  dplyr::select(c("Participant.ID","red_wine_week_0","champ_white_week_0","beer_cider_week_0","spirits_week_0","fortified_week_0","other_week_0"))
str(drink3)

table(drink3$red_wine_week_0)

# 定义统一的清洗转换函数
clean_drink_var <- function(x) {
  trimmed_val <- trimws(x)  # 去除首尾空格
  converted_val <- ifelse(
    trimmed_val %in% c("", "Do not know", "Prefer not to answer") | is.na(trimmed_val),
    NA_character_,          # 转为标准缺失值
    trimmed_val
  )
  as.numeric(converted_val)  # 安全转换为数值型
}

# 批量处理饮料变量（保留ID列）
drink3_clean <- drink3 %>%
  mutate(across(
    .cols = 2:7,  # 指定需处理的列位置（从第2列到第7列）
    .fns = clean_drink_var
  ))

str(drink3_clean)

drink4 <- drink3_clean %>% 
  rename(eid=Participant.ID)

str(drink4)

# 定义目标变量（排除 ID 列）
drink_vars <- c("red_wine_week_0", "champ_white_week_0", "beer_cider_week_0",
                "spirits_week_0", "fortified_week_0", "other_week_0")

# 批量替换缺失值为 0
drink5 <- drink4 %>%
  mutate(across(
    all_of(drink_vars),
    ~ replace_na(., 0)  # 核心替换操作[3,5](@ref)
  ))
str(drink5)

# 定义每种酒类的标准饮酒量转换因子（根据你的描述）
drink_factors <- c(
  red_wine = 1.5,        # 红酒：1杯=1.5标准饮酒量
  white_wine = 1.5,      # 白葡萄酒：1杯=1.5标准饮酒量
  beer_cider = 2.0,      # 啤酒/苹果酒：1杯=2标准饮酒量
  spirits = 1.0,         # 烈酒：1杯=1标准饮酒量
  fortified = 1.25,      # 加强酒：1杯=1.25标准饮酒量
  other = 1.0            # 其他酒类：1杯=1标准饮酒量
)

# 标准饮酒量转酒精克数（1标准饮酒量=8克酒精）
standard_drink_to_grams <- 8


library(dplyr)

drink6 <- drink5 %>%
  mutate(
    # 计算每种酒类的标准饮酒量
    red_wine_std = red_wine_week_0 * drink_factors["red_wine"],
    white_wine_std = champ_white_week_0 * drink_factors["white_wine"],
    beer_cider_std = beer_cider_week_0 * drink_factors["beer_cider"],
    spirits_std = spirits_week_0 * drink_factors["spirits"],
    fortified_std = fortified_week_0 * drink_factors["fortified"],
    other_std = other_week_0 * drink_factors["other"],
    
    # 计算总标准饮酒量
    total_std_drinks = red_wine_std + white_wine_std + beer_cider_std +
      spirits_std + fortified_std + other_std,
    
    # 转换为酒精克数
    total_alc_g = total_std_drinks * standard_drink_to_grams,
    
    # 计算每周酒精消费量（克/周）
    alc_consumption_week0 = total_alc_g
  ) %>%
  dplyr::select(eid, alc_consumption_week0, everything())  # 重排列顺序

summary(drink6$alc_consumption_week0)

drink8 <- drink6 %>% 
  dplyr::select(eid, alc_consumption_week0)

# 使用merge函数根据eid列合并两个数据框，只保留df1中的行
df8 <- merge(df1, drink8, by = "eid", all.x = TRUE)

table(df2$Sex)
df8 <- df8 %>% 
  mutate(
    excessive_drinking0 = case_when(
      Sex == "Male" & alc_consumption_week0 > 210 ~ 1,  # 男性阈值
      Sex == "Female" & alc_consumption_week0 > 140 ~ 1,  # 女性阈值
      TRUE ~ 0                                       # 非过度饮酒
    )
  )
table(df8$excessive_drinking0)

df2 <- df8 %>% 
  filter(excessive_drinking0==0)


j;res<-TableSubgroupMultiCox(
  
  #指定公式
  formula=Surv(FollowUp_Years_NAFLD1,event_2)~muscle_change3,
  
  #指定哪些变量有亚组
  var_subgroups=c("Age_group","Sex","BMI_category"),
  var_cov=c("Townsend","education_new",
            "smokingstatus0","alcohol_new0","MET插补0","central_obesity0",
            "T2DM0_all","low_HDL0","high_TG0","ASM_BMI_0"),#就是在这里添加你的其他协变量！！！
  data=df2#指定你的数据
)

library(openxlsx)

res1 <- res

# 将Upper和Lower列从字符型转换为数值型
res1$Upper <- as.numeric(res1$Upper)
res1$Lower <- as.numeric(res1$Lower)

# 将Point Estimate列中的'Reference'替换为1
res1$`Point Estimate`[res1$`Point Estimate` == "Reference"] <- "1"

# 仅对Point Estimate列为1的行，将Upper和Lower列的NA值设定为1
res1$Upper[res1$`Point Estimate` == "1" & is.na(res1$Upper)] <- 1
res1$Lower[res1$`Point Estimate` == "1" & is.na(res1$Lower)] <- 1
head(res1)

# 替换Variable列中包含'_'的字符为空格
res1$Variable <- gsub("_", " ", res1$Variable)
# 定义检测函数：判断字符串是否为有效数字（允许正负号、小数点和数字）
is_numeric_str <- function(x) {
  grepl("^[-+]?\\d+(\\.\\d+)?$", x)
}

library(grid)
# 生成新列 OR(95%CI)
res2 <- res1 %>%
  mutate(
    `HR(95%CI)` = case_when(
      # 条件1：Point Estimate 为 "Reference" 时填充 "Ref"
      `Point Estimate` == "1" ~ "Ref.",
      
      # 条件2：Lower 为有效数字字符且非缺失时拼接 OR(95%CI)
      is_numeric_str(Lower) & !is.na(Lower) ~ paste0(
        `Point Estimate`, " (", Lower, "-", Upper, ")"
      ),
      
      # 其他情况设为 NA
      TRUE ~ NA_character_
    )
  )

print(res2$`Point Estimate`)

# 将Upper和Lower列从字符型转换为数值型
res2$Upper <- as.numeric(res2$Upper)
res2$Lower <- as.numeric(res2$Lower)

# 将Point Estimate列中的'Reference'替换为1
res2$`Point Estimate`[res2$`Point Estimate` == "Reference"] <- "1"

# 仅对Point Estimate列为1的行，将Upper和Lower列的NA值设定为1
res2$Upper[res2$`Point Estimate` == "1" & is.na(res1$Upper)] <- 1
res2$Lower[res2$`Point Estimate` == "1" & is.na(res1$Lower)] <- 1
head(res2)

# 替换Variable列中包含'_'的字符为空格
res2$Variable <- gsub("_", " ", res2$Variable)


plot_df<-res2
plot_df[,c(2,3,9,10,11)][is.na(plot_df[,c(2,3,9,10,11)])]<-" "
plot_df$` `<-paste(rep(" ",nrow(plot_df)),collapse=" ")
plot_df[,5:8]<-apply(plot_df[,5:8],2,as.numeric)

library(forestploter)
library(grid)

# 保存为PDF文件
pdf(file = "D:\\UKBdata\\dputdata\\forest_plot3_excessive_drinking.pdf", width = 15, height = 25)

p<-forest(
  data=plot_df[,c(1,2,3,11,12,9,10)],
  lower=plot_df$Lower,
  upper=plot_df$Upper,
  est=plot_df$`Point Estimate`,
  ci_column=5,
  #sizes=(plot_df$estimate+0.001)*0.3,
  ref_line=1,
  xlim=c(0,2)
)
print(p)

# 关闭PDF设备
dev.off()







